
# Produces Tables 4 in section 4 

# Libraries
library(haven)
library(tidyverse)
library(ggplot2)
library(ggrepel)
library(ggpmisc)
library(broom)
library(data.table) 
library(dplyr)
library(msm)
library(ivreg)
library(gmm)
library(sandwich)
library(readxl)

wkdir <- "/Users/huiyuli/Dropbox/Entry Technology/JPE macro submission/Replication Script"

########## Table 4 gmm results rows 1 ########## 
# Import data on national real gdp, number of firms, and employment
data_gdp <- fread(paste(wkdir, "raw data/df_nationalrealgdp.csv",  sep="/"))
data_bds <- fread(paste(wkdir, "raw data/bds2020.csv",  sep="/"), select = c("year", "firms", "emp"))
data_pop <- fread(paste(wkdir, "raw data/civnoninst.csv",  sep="/"))
data_pop  <- aggregate(as.numeric(data_pop$lf), by=list( data_pop$year), FUN=sum)%>% 
  rename(year = Group.1, pop = x)
data_pop  <- mutate(data_pop, ln_pop = log(pop),)[, c("year","ln_pop")]
data_gdp_bds <- merge(data_bds, data_gdp, by.x = "year", by.y = "year", all.x = TRUE, all.y = FALSE)
data_gdp_bds <-mutate(data_gdp_bds, ln_GDP_worker = log(real_gdp / emp),
                      ln_emp_per_firm = log(emp / firms),
                      ln_emp = log(emp),
                      ln_firms = log(firms))
data_gdp_bds$ln_firms_lagged <- dplyr::lag(data_gdp_bds$ln_firms, n=1, order_by=data_gdp_bds$year)
sigma=4
#data_gdp_bds$ln_A = data_gdp_bds$ln_GDP_worker - 1/(sigma-1)*data_gdp_bds$ln_firms
data_gdp_bds <- merge(data_gdp_bds, data_pop, by.x = "year", by.y = "year", all.x = TRUE, all.y = FALSE)

# row 1
eq_col_gmm_constrained <- gmm(data_gdp_bds$ln_emp_per_firm ~ data_gdp_bds$ln_GDP_worker, ~data_gdp_bds$ln_pop, c(0, 0), eqConst=2, upper=0, lower=-1, vcov='iid')
summary(eq_col_gmm_constrained)
lambda_1 =  1 - 0 # the coef on ln_GDP_worker is at the constraint of 0
se_lambda_1 = NA 


########## Table 4 gmm results rows 2 and 3 ########## 

data_gsp <- fread(paste(wkdir, "raw data/realStateGDP.csv",  sep="/")) %>% 
  rename(real_gsp = rgdp_haver)
data_gsp <- mutate(data_gsp, state_abbrev = toupper(state_abbrev))
data_fips <- fread(paste(wkdir, "raw data/stateFips.csv",  sep="/"))
data_gsp <- merge(data_gsp, data_fips, by.x = "state_abbrev", by.y = "stusps", all.x = TRUE, all.y = TRUE)
data_bds <- fread(paste(wkdir, "raw data/bds2020_st.csv",  sep="/"),  select = c("year", "firms", "emp",'st'))
data_gsp_bds <- merge(data_gsp, data_bds, by.x = c("st", "year"), by.y = c("st","year"), all.x = FALSE, all.y = TRUE)
data_gsp_bds <-mutate(data_gsp_bds, ln_GSP_worker = log(real_gsp / emp),
                      ln_emp_per_firm = log(emp / firms),
                      ln_emp = log(emp),
                      ln_firms = log(firms))

data_gsp_bds <- data_gsp_bds[ data_gsp_bds$state_abbrev != "DC",]
firstyear = min(data_gsp_bds$year)
data_gsp_bds$ln_firms_lag <- dplyr::lag(data_gsp_bds$ln_firms, n=1, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds[data_gsp_bds$year==firstyear, c("ln_firms_lag")]<- NA
data_birth <- read_dta(paste(wkdir, "raw data/Birthrate/birthrates_updated.dta",  sep="/")) %>% 
  rename(st = statefips, birth_rate_per_1000 = birthrate, year_birth = year )
data_birth_1996_2000 <- fread(paste(wkdir, "raw data/Birthrate/Birth rate 1996_2000.csv",  sep="/"), select = c("year_birth", "birth_rate_per_1000","st")) 
data_birth <- rbind(data_birth, data_birth_1996_2000)
data_birth <-mutate(data_birth, year = year_birth + 20)


data_gsp_bds <- merge(data_gsp_bds, data_birth[,c('year','st','birth_rate_per_1000')], by.x = c("st", "year"), by.y = c("st", "year"), all.x = FALSE, all.y = FALSE) #inner merge

sigma = 4
alpha1 = 13/(70 + 13) # 13% is share of structure and 70% is share of labor in value added used in Caliendo et al ReStud 2018
alpha = 1-alpha1
data_gsp_bds$ln_H = data_gsp_bds$ln_emp - alpha/alpha1*data_gsp_bds$ln_GSP_worker 

# row 2
eq_col_gmm_constrained <- gmm(data_gsp_bds$ln_emp_per_firm ~ data_gsp_bds$ln_GSP_worker, ~data_gsp_bds$birth_rate_per_1000, c(0, 0), eqConst=2, upper=0, lower=-1, vcov='iid')
summary(eq_col_gmm_constrained)
lambda_2 =  1 - 0 # the coef on ln_GDP_worker is at the constraint of 0
se_lambda_2 = NA 

# row 3
eq_col_gmm_constrained <- gmm(data_gsp_bds$ln_emp_per_firm ~ data_gsp_bds$ln_GSP_worker + data_gsp_bds$ln_firms_lag, ~data_gsp_bds$birth_rate_per_1000 + data_gsp_bds$ln_H, c(0, 0), eqConst=2, upper=0, lower=-1, vcov='iid')
summary(eq_col_gmm_constrained)
lambda_3 =  1 - 0 # the coef on ln_GDP_worker is at the constraint of 0
se_lambda_3 = NA 
phi_3 = - round(summary(eq_col_gmm_constrained)$coefficients["data_gsp_bds$ln_firms_lag", "Estimate"]
                , digits= 3)
se_phi_3 = round(summary(eq_col_gmm_constrained)$coefficients["data_gsp_bds$ln_firms_lag", "Std. Error"], digits = 3)

